load("/home/rstudio/22-11-23_IfnKO-integration.Rdata")

volcano_ylims <- c(0, 25)
volcano_xlims <- c(-1, 1)
dnmt_df %>% 
  dplyr::filter(experiment != "naive (Ifnagr KO)" & !is.na(meth)) %>% 
  ggplot(aes(x = tissue, y = meth, color=tissue, group=tissue)) +
  rasterise(ggbeeswarm::geom_quasirandom(size=.2)) +
  stat_summary(fun.data=median_hilow, geom="errorbar", width=0, size=.5, color="black", fun.args=c(conf.int=.5)) +
  stat_summary(fun=median, geom="point", size=2, color="black") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1L), expand=c(.005,.005)) +
  scale_color_manual(values=c("vSVZ"="#0047b9", "striatum"="#78b2ff")) +
  facet_wrap(~experiment, nrow=1) +
  labs(x = "", y = d$label) +
  theme(panel.grid = element_blank(), legend.position = "none",
        panel.border = element_rect(colour="black", fill=NA, size=1),
        axis.text.x = element_text(angle=45, vjust=1, hjust=1))

ggplot2::ggsave("/home/rstudio/22-12-07_Dnmt3a-meth-beeswarm.png", width=12, height=7, units="cm", dpi=500)
ggplot2::ggsave("/home/rstudio/22-12-07_Dnmt3a-meth-beeswarm.svg", width=12, height=7, units="cm", fix_text_size=F)
tmp <- quant_df_all %>% 
  mutate(cell_id_dna = unname(cellname_to_dna_id[sample])) %>% 
  dplyr::filter(experiment %in% c("naive", "2 dpi", "21 dpi", "2 dpi (Ifnagr KO)") &
                method=="scNMT" &
                tissue == "vSVZ" &
                !is.na(cell_id_dna) &
                cell_id_dna %in% rownames(dmr21dpi_mtx)) %>% 
  dplyr::filter(!(experiment == "naive" & is.na(ptime))) %>%  # remove naive non-lineage cells
  group_by(experiment) %>% 
  mutate(n_bins = floor(n() / cells_per_small_bin)) %>%  # this ensures we get about 10-12 cells per bin
  ungroup() %>% 
  mutate(n_bins = if_else(n_bins < 1, 1, n_bins), # bins of size 0 don't make sense, need at least 1 bin
         ptime = if_else(is.na(ptime), rnorm(mean=-100, n=n()), ptime))

# cut_number doesnt work with group_by, so I group manually:
quant_df_binned_svz <- bind_rows(
  tmp %>%
    dplyr::filter(experiment == "naive") %>% 
    mutate(ptime_bin = paste0("n_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "2 dpi") %>% 
    mutate(ptime_bin = paste0("i2_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "21 dpi") %>% 
    mutate(ptime_bin = paste0("i21_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "2 dpi (Ifnagr KO)") %>% 
    mutate(ptime_bin = paste0("ko_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin)
) %>% 
  group_by(ptime_bin) %>%
  add_count(celltype_tissue, name="n") %>%
  mutate(most_common = celltype_tissue[n == max(n)][1]) %>%  # the most common celltype in that bin
  ungroup() %>% 
  arrange(experiment, ptime)
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
tmp <- quant_df_all %>% 
  mutate(cell_id_dna = unname(cellname_to_dna_id[sample])) %>% 
  dplyr::filter(experiment %in% c("naive", "2 dpi", "21 dpi", "2 dpi (Ifnagr KO)") &
                method=="scNMT" &
                tissue == "striatum" &
                !is.na(cell_id_dna) &
                cell_id_dna %in% rownames(dmr21dpi_mtx)) %>% 
  dplyr::filter(!(experiment == "naive" & is.na(ptime))) %>%  # remove naive non-lineage cells
  group_by(experiment) %>% 
  mutate(n_bins = floor(n() / cells_per_small_bin)) %>%  # this ensures we get about 10-12 cells per bin
  ungroup() %>% 
  mutate(n_bins = if_else(n_bins < 1, 1, n_bins), # bins of size 0 don't make sense, need at least 1 bin
         ptime = if_else(is.na(ptime), rnorm(mean=-100, n=n()), ptime))

# cut_number doesnt work with group_by, so I group manually:
quant_df_binned_str <- bind_rows(
  tmp %>%
    dplyr::filter(experiment == "naive") %>% 
    mutate(ptime_bin = paste0("n_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "2 dpi") %>% 
    mutate(ptime_bin = paste0("i2_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "21 dpi") %>% 
    mutate(ptime_bin = paste0("i21_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin),
  tmp %>%
    dplyr::filter(experiment == "2 dpi (Ifnagr KO)") %>% 
    mutate(ptime_bin = paste0("ko_", ggplot2::cut_number(ptime, n=n_bins, labels=F))) %>% 
    arrange(ptime_bin)
) %>% 
  group_by(ptime_bin) %>%
  add_count(celltype_tissue, name="n") %>%
  mutate(most_common = celltype_tissue[n == max(n)][1]) %>%  # the most common celltype in that bin
  ungroup() %>% 
  arrange(experiment, ptime)
## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument

## Warning in seq.default(0, 1, length.out = nbins + 1): first element used of
## 'length.out' argument
rm(tmp)

DMRs found in the lineage after 2 days of ischemia

g1 <- lmr_df %>% 
  dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

g2 <- lmr_df %>% 
  dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

lmr_df %>% 
  dplyr::filter(genotype != "Ifnagr KO") %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>% 
  dplyr::filter(tissue == "vSVZ" & experiment %in% c("naive", "2 dpi")) %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")

p_selection

# Write this file as input to scbs scan
# bind_rows(
#   dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q2toTAP_naive"),
#   dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "q2toTAP_2dpi")
# ) %>% 
#   mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>% 
#   dplyr::select(cell_id_dna, group) %>% 
#   write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q2toTAP-naive-vs-2dpi_labels.csv",
#             col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
                   show_col_types=F)

dmr_df_labels <- dmr_df %>%
  dplyr::filter(padj < .05 & overlaps_gene) %>% # n_obs_fg > 25 & n_obs_bg > 25 & 
  slice_min(p, n=10) %>% 
  arrange(p)

p_volcano <- dmr_df %>%
  dplyr::filter(overlaps_gene) %>% # n_obs_fg > 25 & n_obs_bg > 25 & 
  ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
  geom_point_rast(size=.3) +
  geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
                  segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
  scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
                     limits=volcano_xlims) +
  scale_y_continuous(limits=volcano_ylims) +
  labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
  theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
        legend.position = "none")

p_volcano
## Warning: Removed 7 rows containing missing values (geom_point).

hm_df <- dmr_df %>% 
  dplyr::filter(padj < .05) %>% 
  arrange(direction)

dmr_mtx <- data.table::fread(
    "/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
    data.table = F) %>%
  column_to_rownames(var="V1") %>% 
  as.matrix() %>% 
  magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>% 
  magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>% 
  magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>% 
  magrittr::extract( , hm_df$dmr)

bin_df <- quant_df_binned_str %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_str <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_str

bin_df <- quant_df_binned_svz %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_svz <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
 grid.grabExpr(draw(hm_svz)) +
 grid.grabExpr(draw(hm_str)) +
 plot_layout(heights=c(2, 5, 5))

svglite("/home/rstudio/23-02-10_2dpi-lineage-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## png 
##   2

DMRs found in the vSVZ lineage after 21 days of ischemia

g1 <- lmr_df %>% 
  dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

g2 <- lmr_df %>% 
  dplyr::filter(treatment == "21 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

lmr_df %>% 
  dplyr::filter(genotype != "Ifnagr KO") %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>% 
  dplyr::filter(tissue == "vSVZ" & experiment %in% c("naive", "21 dpi")) %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")

p_selection

# Write this file as input to scbs scan
# bind_rows(
#   dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q2toTAP_naive"),
#   dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "q2toTAP_2dpi")
# ) %>% 
#   mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>% 
#   dplyr::select(cell_id_dna, group) %>% 
#   write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q2toTAP-naive-vs-21dpi_labels.csv",
#             col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-21dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
                   show_col_types=F)

dmr_df_labels <- dmr_df %>%
  dplyr::filter(padj < .05 & overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  slice_min(p, n=10) %>% 
  arrange(p)

p_volcano <- dmr_df %>%
  dplyr::filter(overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
  geom_point_rast(size=.3) +
  geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
                  segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
  scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
                     limits=volcano_xlims) +
  scale_y_continuous(limits=volcano_ylims) +
  labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
  theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
        legend.position = "none")

p_volcano
## Warning: Removed 7 rows containing missing values (geom_point).

hm_df <- dmr_df %>% 
  dplyr::filter(padj < .05) %>% 
  arrange(direction)

dmr_mtx <- data.table::fread(
    "/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-21dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
    data.table = F) %>%
  column_to_rownames(var="V1") %>% 
  as.matrix() %>% 
  magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>% 
  magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>% 
  magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>% 
  magrittr::extract( , hm_df$dmr)

bin_df <- quant_df_binned_str %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_str <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_str

bin_df <- quant_df_binned_svz %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_svz <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
 grid.grabExpr(draw(hm_svz)) +
 grid.grabExpr(draw(hm_str)) +
 plot_layout(heights=c(2, 5, 5))

svglite("/home/rstudio/23-02-10_21dpi-lineage-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 7 rows containing missing values (geom_point).
dev.off()
## png 
##   2

DMRs found in the non-neurogenic reactive astrocytes 2dpi (vSVZ + Striatum combined)

g1 <- lmr_df %>% 
  dplyr::filter(treatment == "naive" & celltype_tissue %in% c("astrocyte (striatum)", "qNSC1") &
                genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

g2 <- lmr_df %>% 
  dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("reactive astrocyte") &
                genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

lmr_df %>% 
  dplyr::filter(genotype != "Ifnagr KO") %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>% 
  dplyr::filter(experiment %in% c("naive", "2 dpi")) %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  ),
  tissue = "vSVZ + striatum") %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")

p_selection

# Write this file as input to scbs scan
# bind_rows(
#   dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q1-SVZStr-naive"),
#   dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "RA-SVZStr-2dpi")
# ) %>%
#   mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
#   dplyr::select(cell_id_dna, group) %>%
#   write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q1-SVZStr-naive-vs-RA-SVZStr-2dpi_labels.csv",
#             col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q1-SVZStr-naive-vs-RA-SVZStr-2dpi_bw2000_ss100_t0.05_mc6_annotated.tsv",
                   show_col_types=F)

dmr_df_labels <- dmr_df %>%
  dplyr::filter(padj < .05 & overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  slice_min(p, n=10) %>% 
  arrange(p)

p_volcano <- dmr_df %>%
  dplyr::filter(overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
  geom_point_rast(size=.3) +
  geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
                  segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
  scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
                     limits=volcano_xlims) +
  scale_y_continuous(limits=volcano_ylims) +
  labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
  theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
        legend.position = "none")

p_volcano
## Warning: Removed 2 rows containing missing values (geom_point).

hm_df <- dmr_df %>% 
  dplyr::filter(padj < .05) %>% 
  arrange(direction)

dmr_mtx <- data.table::fread(
    "/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q1-SVZStr-naive-vs-RA-SVZStr-2dpi_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
    data.table = F) %>%
  column_to_rownames(var="V1") %>% 
  as.matrix() %>% 
  magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>% 
  magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>% 
  magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>% 
  magrittr::extract( , hm_df$dmr)

bin_df <- quant_df_binned_str %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_str <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_str

bin_df <- quant_df_binned_svz %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_svz <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
 grid.grabExpr(draw(hm_svz)) +
 grid.grabExpr(draw(hm_str)) +
 plot_layout(heights=c(2, 5, 5))

svglite("/home/rstudio/23-02-10_RA-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 2 rows containing missing values (geom_point).
dev.off()
## png 
##   2

“Control”: DMRs found in the non-neurogenic reactive astrocytes 2dpi (vSVZ + Striatum combined)

g1 <- lmr_df %>% 
  dplyr::filter(treatment == "naive" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "wt" & !is.na(meth_score)) %>% 
  pull(sample)

g2 <- lmr_df %>% 
  dplyr::filter(treatment == "2 dpi" & celltype_tissue %in% c("qNSC2", "aNSC", "TAP") &
                tissue == "vSVZ" & genotype == "Ifnagr KO" & !is.na(meth_score)) %>% 
  pull(sample)

lmr_df %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  )) %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1))

p_selection <- lmr_df %>% 
  dplyr::filter(experiment %in% c("naive", "2 dpi (Ifnagr KO)")) %>% 
  slice_sample(prop=1) %>%  # randomize overplotting ...
  arrange(!is.na(meth_score)) %>% # ... but put informative cells on top
  mutate(selection = case_when(
    is.na(meth_score) ~ "NA",
    sample %in% g1 ~ "g1",
    sample %in% g2 ~ "g2",
    T ~ "-"
  ),
  tissue = "vSVZ + striatum") %>% 
  ggplot(mapping=aes(x=UMAP_1, y=UMAP_2, fill=selection, color=selection)) +
  geom_point_rast(size=.2, data=integrated_df_10X, color="gray", fill="gray") +  # 0.1
  geom_point_rast(size=1.5, stroke=.2, shape=21) +  # 0.5
  facet_grid(tissue ~ experiment) +
  coord_fixed() +
  labs(x = "UMAP1", y = "UMAP2") +
  scale_fill_manual(values=c("g1"="red", "g2"="blue", "NA"="gray40", "-"="black")) +
  scale_color_manual(values=c("g1"="black", "g2"="black", "NA"="gray40", "-"="black")) +
  theme(panel.grid = element_blank(), axis.ticks = element_blank(), axis.text = element_blank(),
        plot.background = element_blank(), strip.background = element_blank(),
        panel.border = element_rect(colour="black", fill=NA, size=1), legend.position="none")

p_selection

# Write this file as input to scbs scan
# bind_rows(
#   dna_info %>% dplyr::filter(sample %in% g1) %>% mutate(group = "q1-SVZStr-naive"),
#   dna_info %>% dplyr::filter(sample %in% g2) %>% mutate(group = "RA-SVZStr-2dpi")
# ) %>%
#   mutate(cell_id_dna = paste0(cell_id_dna, "_R1R2_dedup.NOMe.CpG")) %>%
#   dplyr::select(cell_id_dna, group) %>%
#   write_csv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/q1-SVZStr-naive-vs-RA-SVZStr-2dpi_labels.csv",
#             col_names = F)
dmr_df <- read_tsv("/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpiKO_bw2000_ss100_t0.05_mc6_annotated.tsv",
                   show_col_types=F)

dmr_df_labels <- dmr_df %>%
  dplyr::filter(padj < .05 & overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  slice_min(p, n=10) %>% 
  arrange(p)

p_volcano <- dmr_df %>%
  dplyr::filter(overlaps_gene) %>% #  & n_obs_fg > 25 & n_obs_bg > 25
  ggplot(aes(x = -diff, y = -log10(p), color = padj < .05, label = gene_symbol)) +
  geom_point_rast(size=.3) +
  geom_text_repel(data=dmr_df_labels, max.overlaps=Inf, size=3, color="black",
                  segment.angle=20, segment.curvature=-0.1, segment.ncp=3, segment.color="gray50", segment.size=.3) +
  scale_x_continuous(labels = scales::percent_format(accuracy=1), minor_breaks = NULL,
                     limits=volcano_xlims) +
  scale_y_continuous(limits=volcano_ylims) +
  labs(x = "difference in DNA methylation", y = "-log10(p-value)", color = "") +
  theme(panel.grid.minor = element_blank(), panel.border = element_rect(colour="black", fill=NA, size=1),
        legend.position = "none")

p_volcano
## Warning: Removed 6 rows containing missing values (geom_point).

hm_df <- dmr_df %>% 
  dplyr::filter(padj < .05) %>% 
  arrange(direction)

dmr_mtx <- data.table::fread(
    "/mnt/volume/22-04-11_scNMT-ischemia/DNA_for_figure/08_diff/diff_q2toTAP-naive-vs-2dpiKO_bw2000_ss100_t0.05_mc6_a0.1-matrices/methylation_fractions.csv.gz",
    data.table = F) %>%
  column_to_rownames(var="V1") %>% 
  as.matrix() %>% 
  magrittr::set_rownames(str_remove(rownames(.), "_R1R2_dedup.NOMe.[CG]p[CG]")) %>% 
  magrittr::extract(rownames(.) %in% names(dna_id_to_cellname), ) %>% 
  magrittr::set_rownames(unname(dna_id_to_cellname[rownames(.)])) %>% 
  magrittr::extract( , hm_df$dmr)

bin_df <- quant_df_binned_str %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_str, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_str <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_str

bin_df <- quant_df_binned_svz %>% 
  distinct(ptime_bin, experiment, most_common)

bin_ctypes <- bin_df %>%
  pull(most_common)

ha <- HeatmapAnnotation(celltype = bin_ctypes,
                        col = list(celltype = mycolorscheme2),
                        show_legend=F,
                        simple_anno_size=unit(2,"mm"),
                        show_annotation_name=F)

meth_mtx_ptime_bins <- sapply(bin_df$ptime_bin, function(b) {
  idx <- dplyr::filter(quant_df_binned_svz, ptime_bin == b) %>% pull(sample) %>% unique()
  colMeans(dmr_mtx[idx, hm_df$dmr], na.rm=T)
}) %>%
  magrittr::set_rownames(hm_df$dmr) %>% 
  magrittr::set_colnames(bin_df$ptime_bin)

hm_svz <- meth_mtx_ptime_bins %>%
  t() %>% scale() %>% t() %>%  
  Heatmap(cluster_rows=F, show_row_dend=F, cluster_row_slices=F, cluster_columns=F, use_raster=F,
          show_row_names=F, show_column_names=F,
          name="meth", bottom_annotation=ha,
          width=unit(ncol(meth_mtx_ptime_bins) / 7, "cm"), height=unit(nrow(meth_mtx_ptime_bins) / 25, "cm"), show_heatmap_legend=F,
          row_split=paste(hm_df$direction, "LMRs", sep="\n"),
          column_split=bin_df$experiment, column_title=unique(bin_df$experiment),
          color_viridis(., option="inferno", direction=-1L))

hm_svz

p <- (p_selection + p_volcano + plot_layout(widths=c(6, 10)) ) /
 grid.grabExpr(draw(hm_svz)) +
 grid.grabExpr(draw(hm_str)) +
 plot_layout(heights=c(2, 5, 5))

svglite("/home/rstudio/23-02-10_2dpiKO-DMRs.svg", width=unit(6,"cm"), height=unit(15,"cm"), fix_text_size=F)
show(p)
## Warning: Removed 6 rows containing missing values (geom_point).
dev.off()
## png 
##   2